# coding: utf-8
import pandas as pd
import numpy as np
import os
from copy import deepcopy, copy
from joblib import load, dump
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.model_selection import GridSearchCV
from src.utils import benchmark_sample_action, benchmark_cal_reward_costs

####################################################################################
####################################################################################
############################## AREA OF INPUT PARAMETERS ############################
####################################################################################
####################################################################################

##### Environment Parameters
PATH = "." # Root directory, should be the same path this "README.md" file locates
PATH_DATA = f"{PATH}/data" # Path for data
PATH_MODELS = f"{PATH}/models" # Path for models

##### Parameters for Bandits
size_norm = 50000 # T, use 50000 to reproduce the results
budget = 1600 # Budget constraint, to reproduce, test with 1600 and 2200
random_seed  = 1990 # To reproduce the results, keep seed as 1990

####################################################################################
####################################################################################
############# Create Output Path, Load the data and Conversion Model ###############
####################################################################################
####################################################################################

##### Load Data and Model
dt_init = pd.read_parquet(f"{PATH_DATA}/dt_env.parq")
model_conversion = load(f"{PATH_MODELS}/conversion_model.pkl")

policy_optim = load(f"{PATH_MODELS}/budget_{budget}/policy_optim_static.pkl")
dict_expect_optim_reward_costs = load(f"{PATH_MODELS}/budget_{budget}/dict_expect_optim_reward_costs.pkl")

##### Create the folder for the output model
if os.path.isdir(f"{PATH_MODELS}/budget_{budget}"):
    pass
else:
    os.makedirs(f"{PATH_MODELS}/budget_{budget}")

####################################################################################
####################################################################################
############################# Prepare the data #####################################
####################################################################################
####################################################################################

##### Parameters of General Bandits
var_model = ["RISK_SCORE", "EDUCATION", "MARRIAGE", "AMOUNT_CLUSTER", "AGE_CLUSTER"]
var_model_onehot = list(model_conversion.feature_names_in_)
var_base_reward_costs = ["amount_norm", "discount_base_norm", "discount", "constant"]
list_actions = [-1] + [10, 20, 35, 55, 80]
dict_mapping_actions = {i: list_actions[i] for i in range(len(list_actions))}

##### Prepare the data
dt_init["discount"] = 0
dt_init["constant"] = 1
np.random.seed(random_seed)
dt_env = dt_init.sample(size_norm, replace=True).reset_index(drop = True)

policy_optim_env = \
    dt_env[["index_context"]].merge(policy_optim, how = "left", on = "index_context").reset_index(drop = True)

policy_optim_env = policy_optim_env[list_actions].values

dt_env_with_optim = deepcopy(dt_env)
dt_env_with_optim["optim_static_action"] = \
    benchmark_sample_action(policy_optim_env, dict_mapping_actions, seed = random_seed)

dt_env_with_optim["optim_static_action"].value_counts(normalize = True)

dt_env_with_optim = \
    benchmark_cal_reward_costs(dt_env_with_optim, model_conversion, budget, random_seed, apply_null = False)

print(f"Expected Reward: {dict_expect_optim_reward_costs['expect_reward']}")
print(f"Expected Cost2: {dict_expect_optim_reward_costs['expect_cost1']}")
print(f"Expected Cost1: {dict_expect_optim_reward_costs['expect_cost2']}")
print("------------------------------")
print(f"Realized Reward: {np.cumsum(dt_env_with_optim['reward']).max()}")
print(f"Realized Cost2: {np.cumsum(dt_env_with_optim['cost2']).max()}")
print(f"Realized Cost1: {np.cumsum(dt_env_with_optim['cost1']).max()}")

####################################################################################
####################################################################################
########################## Hyperparameters Tuning ##################################
####################################################################################
####################################################################################

##### Hyperparameters Tuning for Logistic Bandits
dt_env_with_optim_dummy = pd.get_dummies(dt_env_with_optim, prefix_sep="_zl_")
dt_env_with_optim_dummy = dt_env_with_optim_dummy[var_model_onehot + var_base_reward_costs]

grid_logistic={"C":list(np.logspace(1, 3, 10))} # l1 lasso l2 ridge
logreg= LogisticRegression(fit_intercept=False)
logreg_cv=GridSearchCV(logreg, grid_logistic, cv=5, scoring='neg_log_loss', verbose = 1)
logreg_cv.fit(X = dt_env_with_optim_dummy[var_model_onehot + ["constant"]].values,
              y=dt_env_with_optim["conversion_optim_static_action"].values)

print("tuned hpyerparameters :(best parameters) ", logreg_cv.best_params_)
print("logloss :",logreg_cv.best_score_)

##### Hyperparameters Tuning for Linear Bandits

### For reward model
grid_ridge_reward={"alpha":np.logspace(-2, 0, 10)}# l1 lasso l2 ridge
ridge_reward= Ridge(fit_intercept=False)
ridge_reward_cv=GridSearchCV(ridge_reward, grid_ridge_reward, cv=5, scoring='neg_mean_squared_error', verbose = 1)
ridge_reward_cv.fit(X = dt_env_with_optim_dummy.values, y=dt_env_with_optim["reward"].values)

print("tuned hpyerparameters :(best parameters) ",ridge_reward_cv.best_params_)
print("neg_mean_squared_error :",ridge_reward_cv.best_score_)

### For cost2 model

grid_ridge_cost2={"alpha":np.logspace(-2, 0, 10)}# l1 lasso l2 ridge
ridge_cost2= Ridge(fit_intercept=False)
ridge_cost2_cv=GridSearchCV(ridge_cost2, grid_ridge_cost2, cv=5, scoring='neg_mean_squared_error', verbose = 1)
ridge_cost2_cv.fit(X = dt_env_with_optim_dummy.values, y=dt_env_with_optim["cost2"].values)

print("tuned hpyerparameters :(best parameters) ", ridge_cost2_cv.best_params_)
print("neg_mean_squared_error :", ridge_cost2_cv.best_score_)

### For cost1 model
grid_ridge_cost1={"alpha":np.logspace(-2, 0, 10)}# l1 lasso l2 ridge
ridge_cost1= Ridge(fit_intercept=False)
ridge_cost1_cv=GridSearchCV(ridge_cost1, grid_ridge_cost1, cv=5, scoring='neg_mean_squared_error', verbose = 1)
ridge_cost1_cv.fit(X = dt_env_with_optim_dummy.values, y=dt_env_with_optim["cost1"].values)

print("tuned hpyerparameters :(best parameters) ", ridge_cost1_cv.best_params_)
print("neg_mean_squared_error :", ridge_cost1_cv.best_score_)

### Get final hyperparameters for linear bandits
lmd_linear = np.mean([ridge_reward_cv.best_params_["alpha"],  np.mean([ridge_cost1_cv.best_params_["alpha"],
                                                                       ridge_cost2_cv.best_params_["alpha"]])])


dict_hyper = {"lmd_logistic": np.round(1/logreg_cv.best_params_["C"], 4),
              "lmd_linear": np.round(lmd_linear, 4),
              "Z_linear": np.round(dict_expect_optim_reward_costs['expect_reward'] / budget, 4)}

####################################################################################
####################################################################################
######################### Export Hyperparameters ###################################
####################################################################################
####################################################################################

print(dict_hyper)
dump(dict_hyper, f"{PATH_MODELS}/budget_{budget}/dict_hyper.pkl")

